package com.hdb.downloder;
/*
 * 自定义downloader解决
 * Https下无法抓取只支持TLS1.2的站点
 */
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.annotation.ThreadSafe;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.AbstractDownloader;
import us.codecraft.webmagic.downloader.HttpClientRequestContext;
import us.codecraft.webmagic.downloader.HttpUriRequestConverter;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils;
import us.codecraft.webmagic.utils.HttpClientUtils;


	@ThreadSafe
	public class HttpClientDownloader extends AbstractDownloader {

	    private Logger logger = LoggerFactory.getLogger(getClass());

	    private final Map<String, CloseableHttpClient> httpClients = new HashMap<String, CloseableHttpClient>();

	    private HttpClientGenerator httpClientGenerator = new HttpClientGenerator();

	    private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
	    
	    private ProxyProvider proxyProvider;

	    private boolean responseHeader = true;

	    public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
	        this.httpUriRequestConverter = httpUriRequestConverter;
	    }

	    public void setProxyProvider(ProxyProvider proxyProvider) {
	        this.proxyProvider = proxyProvider;
	    }

	    private CloseableHttpClient getHttpClient(Site site) {
	        if (site == null) {
	            return httpClientGenerator.getClient(null);
	        }
	        String domain = site.getDomain();
	        CloseableHttpClient httpClient = httpClients.get(domain);
	        if (httpClient == null) {
	            synchronized (this) {
	                httpClient = httpClients.get(domain);
	                if (httpClient == null) {
	                    httpClient = httpClientGenerator.getClient(site);
	                    httpClients.put(domain, httpClient);
	                }
	            }
	        }
	        return httpClient;
	    }

	    public Page download(Request request, Task task) {
	        if (task == null || task.getSite() == null) {
	            throw new NullPointerException("task or site can not be null");
	        }
	        CloseableHttpResponse httpResponse = null;
	        CloseableHttpClient httpClient = getHttpClient(task.getSite());
	        Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(task) : null;
	        HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy);
	        Page page = Page.fail();
	        try {
	            httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext());
	            page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task);
	            onSuccess(request);
	            logger.info("downloading page success {}", request.getUrl());
	            return page;
	        } catch (IOException e) {
	            logger.warn("download page {} error", request.getUrl(), e);
	            onError(request);
	            return page;
	        } finally {
	            if (httpResponse != null) {
	                //ensure the connection is released back to pool
	                EntityUtils.consumeQuietly(httpResponse.getEntity());
	            }
	            if (proxyProvider != null && proxy != null) {
	                proxyProvider.returnProxy(proxy, page, task);
	            }
	        }
	    }

	    public void setThread(int thread) {
	        httpClientGenerator.setPoolSize(thread);
	    }

	    protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
	        byte[] bytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
	        String contentType = httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue();
	        Page page = new Page();
	        page.setBytes(bytes);
	        if (!request.isBinaryContent()){
	            if (charset == null) {
	                charset = getHtmlCharset(contentType, bytes);
	            }
	            page.setCharset(charset);
	            page.setRawText(new String(bytes, charset));
	        }
	        page.setUrl(new PlainText(request.getUrl()));
	        page.setRequest(request);
	        page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
	        page.setDownloadSuccess(true);
	        if (responseHeader) {
	            page.setHeaders(HttpClientUtils.convertHeaders(httpResponse.getAllHeaders()));
	        }
	        return page;
	    }

	    private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
	        String charset = CharsetUtils.detectCharset(contentType, contentBytes);
	        if (charset == null) {
	            charset = Charset.defaultCharset().name();
	            logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
	        }
	        return charset;
	    }
	}


